getwd()

#install.packages("adaptMCMC")
#install.packages("igraph")
library('adaptMCMC') #for generating MC samples
library('igraph') #for obtaining MST
library('MASS')
#install.packages("readxl")
library('readxl')
#install.packages('ggplot2')
library(ggplot2)
#install.packages('glmnet')
library(glmnet)

source('util.R')
source('generateData.R')
#install.packages('ks')
source('mutuInfo.R')


#====================== Test the bivariate EF ====================
#generate data for test
theta0 <- c(-1, -1, -1, 0.9, 0.5, 0.5)
p.log <- function(x) {
  theta0[1] * x[1]^2 * x[2]^2 + theta0[2] * x[1]^2 + theta0[3] * x[2]^2 + theta0[4] * x[1] * x[2] +
    theta0[5] * x[1] + theta0[6] * x[2]
}
samp <- MCMC(p.log, n=2000, init=c(0, 1), scale=c(1, 0.1), adapt=TRUE, acc.rate=0.3)
dat <- samp$samples
dat <- dat[seq(1,nrow(dat),5),]
pacf(dat)
plot(dat[,1], dat[,2])

#test code
theta <- est_EF_param(dat)
muInfo <- calculate_entr(theta)
cond_density12 <- cond_log_density(dat, 1, 2, theta)

dat <- genDesignMat(1000,6,0)
invisible(capture.output (cl_tree <- CL_tree(dat) ))
edges <- cl_tree$edges
root <- cl_tree$root
cen <- cl_tree$cen
rank <- cl_tree$rank


 

#compare with random foreset
#install.packages('randomForest')
library(randomForest)

nrep <- 1000
acc_trs <- rep(NA, nrep)
acc_rfs <- rep(NA, nrep)
acc_elas <- rep(NA, nrep)
for (r in 1:nrep){
  #my tree method
  dat <- gen_data1(0.9)  
  
  ########################################
  
  
  # trees <- get_tree(dat$X_train, dat$Y_train)
  # #save(trees, dat, file = "dataset-classification/rockmines.RData")
  # #load("dataset-classification/rockmines.RData")
  # pred_tr <- pred_by_tree(trees, dat$X_test)
  # acc_tr <- mean(dat$Y_test == pred_tr)
  # sprintf("Accuracy of My method: %s", round(acc_tr,3))
  # acc_trs[r] <- acc_tr
  
  # #RF method
  # pred_rf <- pred_by_rf(dat$X_train, factor(dat$Y_train), dat$X_test)
  # acc_rf <- mean(dat$Y_test == pred_rf)
  # sprintf("Accuracy of RF: %s", round(acc_rf,3))
  # acc_rfs[r] <- acc_rf
  # 
  #Elastic net method
  pred_ela <- pred_by_elas(dat$X_train, factor(dat$Y_train), dat$X_test)
  acc_ela <- mean(dat$Y_test == pred_ela)
  sprintf("Accuracy of Elas: %s", round(acc_ela,3))
  acc_elas[r] <- acc_ela
} 
c(mean(acc_trs), mean(acc_rfs), mean(acc_elas))



#return predicted labels of X_test
pred_by_tree <- function(trees, X_test){
  nC <- length(trees)
  nt <- nrow(X_test)
  preds <- matrix(NA, nrow = nt, ncol = nC)
  for (k in 1:nC){
    preds[,k] <- predict_density(trees[[k]], X_test)
  }
  #assuming equal prior
  res <- apply(preds,1,which.max)
}

pred_by_rf <- function(X_train, Y_train, X_test){
  rf <- randomForest(X_train, Y_train)
  res <- as.numeric(predict(rf, X_test))
}

pred_by_elas <- function(X_train, Y_train, X_test){
  #elas.mod=glmnet(X_train, Y_train, alpha=0.9)
  elas.mod = cv.glmnet(X_train, Y_train, alpha=0.9, family="binomial")   #alpha=1: lasso
  elas.pred = predict(elas.mod, s=elas.mod$lambda.min, newx=X_test, type="response")
  res <- as.numeric((elas.pred>0.5) + 1)
  #which(coef(cv.glmmod, s = "lambda.min")>0)
}

get_tree <- function(X_train, Y_train){
  trees <- list()
  nC <- length(unique(Y_train))
  for (k in 1:nC){
    cl_tree <- CL_tree(X_train[Y_train==k,])
    trees[[k]] <- cl_tree
  }
  trees
}



#======================
# Synthetic experiment: independence test based on gradient information -- nonparametric KDE

# Summary: gradient information has better power in independence tests for small n  
# not to be included in this paper due to page limits 
#install.packages('energy')
library('energy')
library('pROC')



# return True if dependent
# dat: n x 2 data
test_indp <- function(dat, type='F', alpha=0.05){

  n <- nrow(dat)
  if (type == 'F' || type == 'S'){

    #processed joint and product data, for measuring independence
    rep_resamp <- 50
    mi <- MI_v2(dat, 1, 2, type)
    mi_resamp <- c()
    for (k in 1:rep_resamp){
      #ind <- sample(n, floor(n/3), replace = TRUE)
      dat_resamp <- cbind(dat[sample(n, n/5, replace = TRUE),1], dat[sample(n, n/5, replace = TRUE),2])
      mi_resamp <- c(mi_resamp, MI_v2(dat_resamp, 1, 2, type))
    }
    #hist(mi_resamp)
    reject <- (mi > quantile(mi_resamp, 1-alpha))
  }else if(type == 'C'){
    dcov_res <- dcov.test(dat[,1], dat[,2], index = 1.0, R = 200)
    reject <- (dcov_res$p.value < alpha)
  }else{
    stop('something wrong in test_indepenence func.')
  }

  reject
}

# the p value calculated for each dataset, under permutation test
# to be used to form ROC
p_value_indp <- function(dat, type){

  n <- nrow(dat)
  if (type == 'F' || type == 'S'){

    #processed joint and product data, for measuring independence
    mi <- MI_v2(dat, 1, 2, type)

    #alternative 1 (slow)
    rep_resamp <- 50
    mi_resamp <- c()
    for (k in 1:rep_resamp){
      #ind <- sample(n, floor(n/3), replace = TRUE)
      dat_resamp <- cbind(dat[sample(n, n/5, replace = TRUE),1], dat[sample(n, n/5, replace = TRUE),2])
      mi_resamp <- c(mi_resamp, MI_v2(dat_resamp, 1, 2, type))
    }
    #hist(mi_resamp)
    prob <- mean(mi_resamp>mi)

    #alternative 2 (fast, for power comparison only)
    #prob <- mi

  }else if(type == 'C'){
    dcov_res <- dcov.test(dat[,1], dat[,2], index = 1.0, R = 200)
    prob <- 1 - dcov_res$p.value
  }else{
    stop('something wrong in test_indepenence func.')
  }

  prob
}


 






